Table of Contents

gold plotting


In [ ]:
from planet4 import io, markings, plotting, catalog_production
import matplotlib as mpl

In [ ]:
import seaborn as sns
sns.set_context('paper')
sns.set_palette('bright', color_codes=True)

In [ ]:
def get_gold_ids(person):
    """Get gold data
    
    Pararemeters
    ------------
    person : {"GP", "MES", "KMA", "common_gold_data"}
    
    Returns
    -------
    pd.Series
    """
    path = Path("/Users/klay6683/Dropbox/Documents/latex_docs/p4_paper1/gold_data")
    return pd.read_csv(path / f"{person}.txt", header=None, squeeze=True)

class DataPipe:
    gold_names = dict(GP=("Portyankina", 'anya'), MES=('mschwamb','meg'), KMA=('michaelaye', 'michael'))
    def __init__(self, version='v1.0'):
        self.rm = catalog_production.ReleaseManager(version)
        self.db = io.DBManager()
        
    @property
    def anyas_ids(self):
        return get_gold_ids('GP')
    
    @property
    def megs_ids(self):
        return get_gold_ids('MES')
    
    @property
    def michaels_ids(self):
        return get_gold_ids('KMA')
    
    @property
    def common_ids(self):
        return get_gold_ids('common_gold_data')
    
    def get_catalog_fans_for_id(self, id_):
        pm = io.PathManager(id_=id_, datapath=self.rm.savefolder)
        return pm.final_fandf if pm.final_fanfile.exists() else None

    def get_catalog_blotches_for_id(self, id_):
        pm = io.PathManager(id_=id_, datapath=self.rm.savefolder)
        return pm.final_blotchdf if pm.final_blotchfile.exists() else None

    def get_catalog_data_for_id(self, id_):
        fans = self.get_catalog_fans_for_id(id_)
        if fans is not None:
            fans['marking'] = 'fan'
        blotches = self.get_catalog_blotches_for_id(id_)
        if blotches is not None:
            blotches['marking'] = 'blotch'
        if any([fans is not None,
                blotches is not None]):
            return pd.concat([fans, blotches], ignore_index=True, sort=True)
        else:
            return None
    
    def get_catalog_data_for_ids(self, ids):
        data = list(map(self.get_catalog_data_for_id, ids))
        return pd.concat(data, ignore_index=True, sort=True) if data is not None else None
    
    def get_catalog_data_for_gold_member(self, name):
        ids = get_gold_ids(name)
        return pd.concat(map(self.get_catalog_data_for_id, ids), ignore_index=True)
    
    def get_user_markings_for_id(self, user, id_,):
        data = self.db.get_image_id_markings(id_)
        return data.query('user_name == @user')

    def get_user_markings_for_ids(self, user, ids):
        data = [self.get_user_markings_for_id(user, id_,) for id_ in ids]
        return pd.concat(data, ignore_index=True, sort=True)
    
    def get_gold_markings_for_expert(self, expert_id):
        names = self.gold_names[expert_id]
        ids = get_gold_ids(expert_id)
        return self.get_user_markings_for_ids(names[0], ids)
    
    def get_gold_markings_common(self):
        data = [self.db.get_image_id_markings(id_).
                query('user_name in @markings.GOLD_MEMBERS') for id_ in self.common_ids]
        return pd.concat(data, ignore_index=True, sort=True)

In [ ]:
rm = catalog_production.ReleaseManager('v1.0')

In [ ]:
rm.savefolder

In [ ]:
pm = io.PathManager(id_='7xp', datapath=rm.savefolder)

In [ ]:
pm.final_fanfile

In [ ]:
pipe = DataPipe()

In [ ]:
gold_data = {}

for expert in pipe.gold_names.keys():
    print(expert)
    gold_data[expert] = pipe.get_gold_markings_for_expert(expert)

gold_data['common'] = pipe.get_gold_markings_common()

In [ ]:
catalog_data = {}
for expert in pipe.gold_names.keys():
    print(expert)
    ids = get_gold_ids(expert)
    catalog_data[expert] = pipe.get_catalog_data_for_ids(ids)

In [ ]:
catalog_data['common'] = pipe.get_catalog_data_for_ids(get_gold_ids('common_gold_data'))

In [ ]:
for expert in pipe.gold_names.keys():
    print(expert)
    print(gold_data[expert].image_id.nunique())
    print(len(get_gold_ids(expert)))

In [ ]:
for expert in pipe.gold_names.keys():
    print(gold_data[expert].nunique())

In [ ]:
%matplotlib ipympl
plt.style.use('tableau-colorblind10')

In [ ]:
plt.rcParams['axes.grid'] = False
plt.rcParams['axes.grid.which'] = 'both'

In [ ]:
bins = np.arange(0, 175, 5)
fig, axes = plt.subplots(nrows=3, figsize=(10,7), sharex=False)
for expert,ax in zip(pipe.gold_names.keys(), axes):
    ax.grid(which='major', lw=0.5)
    data = {expert:gold_data, 'catalog':catalog_data}
    expdata = gold_data[expert].groupby('image_id').size()
    catdata = catalog_data[expert].groupby('image_id').size()
    _ = ax.hist([expdata, catdata], bins=bins, log=True, label=[expert, 'catalog'])
    ax.legend()
    ax.set_ylabel("# of tiles")
ax.set_xlabel('# of fans+blotches per Planet Four tile')
fig.suptitle('Expert vs Catalog object identification frequency')
fig.subplots_adjust(top=0.95)
fig.savefig('/Users/klay6683/Dropbox/src/p4_paper1/figures/gold_fans_and_blotches_histos.pdf',
            dpi=200, bbox_inches='tight')

In [ ]:
sns.set_context('notebook')

In [ ]:
bins = np.arange(0, 75, 5)
fig, axes = plt.subplots(nrows=1, figsize=(10,2.5), sharex=False)
axes.grid(which='major', lw=0.5)
expert='common'
expdata = gold_data[expert].groupby(['user_name','image_id']).size()
catdata = catalog_data[expert].groupby('image_id').size()
_ = axes.hist([expdata['Portyankina'], expdata['mschwamb'], 
               expdata['michaelaye'], catdata], bins=bins, log=True, label=['GP', 'MES', 'KMA', 'catalog'])
axes.legend()
axes.set_ylabel("# of tiles")
axes.set_xlabel('# of fans+blotches per Planet Four tile')
axes.set_title("Common Expert data vs Catalog");
fig.savefig('/Users/klay6683/Dropbox/src/p4_paper1/figures/gold_fans_and_blotches_histos_common.pdf',
            dpi=150, bbox_inches='tight')

In [ ]:
plt.close('all')

In [ ]:
bins = np.arange(0, 85, 5)
fig, axes = plt.subplots(nrows=3, figsize=(10,7), sharex=False)
for expert,ax in zip(pipe.gold_names.keys(), axes):
    ax.grid(which='major', lw=0.5)
    data = {expert:gold_data, 'catalog':catalog_data}
    expdata = gold_data[expert].query('marking=="fan"').groupby('image_id').size()
    catdata = catalog_data[expert].query('marking=="fan"').groupby('image_id').size()
    _ = ax.hist([expdata, catdata], bins=bins, log=True, label=[expert, 'catalog'])
    ax.legend()
    ax.set_ylabel("# of tiles")
ax.set_xlabel('# of fans per Planet Four tile')
fig.suptitle('Expert vs Catalog object identification frequency: Fans only')
fig.subplots_adjust(top=0.95)
fig.savefig('/Users/klay6683/Dropbox/src/p4_paper1/figures/gold_fans_histos.pdf',
            dpi=200, bbox_inches='tight')

In [ ]:
bins = np.arange(0, 60, 5)
fig, axes = plt.subplots(nrows=1, figsize=(10,2.5), sharex=False)
axes.grid(which='major', lw=0.5)
expert='common'
expdata = gold_data[expert].query('marking=="fan"').groupby(['user_name','image_id']).size()
catdata = catalog_data[expert].query('marking=="fan"').groupby('image_id').size()
_ = axes.hist([expdata['Portyankina'], expdata['mschwamb'], 
               expdata['michaelaye'], catdata], bins=bins, log=True, label=['GP', 'MES', 'KMA', 'catalog'])
axes.legend()
axes.set_ylabel("# of tiles")
axes.set_xlabel('# of fans per Planet Four tile')
axes.set_title("Common Expert data vs Catalog: Fans only");
fig.savefig('/Users/klay6683/Dropbox/src/p4_paper1/figures/gold_fans_histos_common.pdf',
            dpi=150, bbox_inches='tight')

In [ ]:
bins = np.arange(0, 85, 5)
fig, axes = plt.subplots(nrows=3, figsize=(10,7), sharex=False)
for expert,ax in zip(pipe.gold_names.keys(), axes):
    ax.grid(which='major', lw=0.5)
    data = {expert:gold_data, 'catalog':catalog_data}
    expdata = gold_data[expert].query('marking=="blotch"').groupby('image_id').size()
    catdata = catalog_data[expert].query('marking=="blotch"').groupby('image_id').size()
    _ = ax.hist([expdata, catdata], bins=bins, log=True, label=[expert, 'catalog'])
    ax.legend()
    ax.set_ylabel("# of tiles")
ax.set_xlabel('# of blotches per Planet Four tile')
fig.suptitle('Expert vs Catalog object identification frequency: Blotches only')
fig.subplots_adjust(top=0.95)
fig.savefig('/Users/klay6683/Dropbox/src/p4_paper1/figures/gold_blotches_histos.pdf',
            dpi=200, bbox_inches='tight')

In [ ]:
bins = np.arange(0, 60, 5)
fig, axes = plt.subplots(nrows=1, figsize=(10,2.5), sharex=False)
axes.grid(which='major', lw=0.5)
expert='common'
expdata = gold_data[expert].query('marking=="blotch"').groupby(['user_name','image_id']).size()
catdata = catalog_data[expert].query('marking=="blotch"').groupby('image_id').size()
_ = axes.hist([expdata['Portyankina'], expdata['mschwamb'], 
               expdata['michaelaye'], catdata], bins=bins, log=True, label=['GP', 'MES', 'KMA', 'catalog'])
axes.legend()
axes.set_ylabel("# of tiles")
axes.set_xlabel('# of blotches per Planet Four tile')
axes.set_title("Common Expert data vs Catalog: Blotches only");
fig.savefig('/Users/klay6683/Dropbox/src/p4_paper1/figures/gold_blotches_histos_common.pdf',
            dpi=150, bbox_inches='tight')

In [ ]:
plt.close('all')

In [ ]:
bins = np.arange(0, 600, 30)
fig, axes = plt.subplots(nrows=3, figsize=(10,7), sharex=False)
for expert,ax in zip(pipe.gold_names.keys(), axes):
    ax.grid(which='major', lw=0.5)
    data = {expert:gold_data, 'catalog':catalog_data}
    expdata = gold_data[expert].query('marking=="fan"').distance
    catdata = catalog_data[expert].query('marking=="fan"').distance
    _ = ax.hist([expdata, catdata], bins=bins, log=True, label=[expert, 'catalog'])
    ax.legend()
    ax.set_ylabel("# of fans")
ax.set_xlabel('Fan lengths [pixel]')
fig.suptitle('Fans lengths, expert vs catalog')
fig.subplots_adjust(top=0.95)
fig.savefig('/Users/klay6683/Dropbox/src/p4_paper1/figures/gold_fan_lengths.pdf',
            dpi=200, bbox_inches='tight')

In [ ]:
bins = np.arange(0, 600, 30)
fig, axes = plt.subplots(nrows=1, figsize=(10,2.5), sharex=False)
axes.grid(which='major', lw=0.5)
expert='common'
expdata = gold_data[expert].query('marking=="fan"').groupby(['user_name']).distance
catdata = catalog_data[expert].query('marking=="fan"').distance
_ = axes.hist([expdata.get_group('Portyankina'), expdata.get_group('mschwamb'), 
               expdata.get_group('michaelaye'), catdata], bins=bins, log=True, label=['GP', 'MES', 'KMA', 'catalog'])
axes.legend()
axes.set_ylabel("# of fans")
axes.set_xlabel('Fan lengths [pixel]')
axes.set_title("Fan lengths, common expert data vs catalog");
fig.savefig('/Users/klay6683/Dropbox/src/p4_paper1/figures/gold_fan_lengths_common.pdf',
            dpi=150, bbox_inches='tight')

In [ ]:
plt.close('all')

In [ ]:
bins = np.arange(300, 120000, 5000)
from math import pi
fig, axes = plt.subplots(nrows=3, figsize=(10,7), sharex=False)
for expert,ax in zip(pipe.gold_names.keys(), axes):
    ax.grid(which='major', lw=0.5)
    data = {expert:gold_data, 'catalog':catalog_data}
    expdata = gold_data[expert].query('marking=="blotch"')[['radius_1', 'radius_2']]
    expdata = expdata.radius_1*expdata.radius_2*pi
    catdata = catalog_data[expert].query('marking=="blotch"')[['radius_1', 'radius_2']]
    catdata = catdata.radius_1*catdata.radius_2*pi
    _ = ax.hist([expdata, catdata], bins=bins, log=True, label=[expert, 'catalog'])
    ax.legend()
    ax.set_ylabel("# of blotches")
ax.set_xlabel('Blotch area [pixel**2]')
fig.suptitle('Blotch area, expert vs catalog')
fig.subplots_adjust(top=0.95)
fig.savefig('/Users/klay6683/Dropbox/src/p4_paper1/figures/gold_blotch_areas.pdf',
            dpi=200, bbox_inches='tight')

In [ ]:
bins = np.arange(300, 80000, 5000)
from math import pi
fig, axes = plt.subplots(nrows=1, figsize=(10,2.5), sharex=False)
axes.grid(which='major', lw=0.5)
expert='common'
gold_data[expert]['area'] = gold_data[expert].radius_1*gold_data[expert].radius_2*pi
catalog_data[expert]['area'] = catalog_data[expert].radius_1*catalog_data[expert].radius_2*pi
expdata = gold_data[expert].query('marking=="blotch"').groupby(['user_name']).area
catdata = catalog_data[expert].query('marking=="blotch"').area
_ = axes.hist([expdata.get_group('Portyankina'), expdata.get_group('mschwamb'), 
               expdata.get_group('michaelaye'), catdata], bins=bins, log=True, label=['GP', 'MES', 'KMA', 'catalog'])
axes.legend()
axes.set_ylabel("# of blotches")
axes.set_xlabel('Blotch area [pixel**2]')
axes.set_title("Blotch area, common expert data vs catalog");
fig.savefig('/Users/klay6683/Dropbox/src/p4_paper1/figures/gold_blotch_areas_common.pdf',
            dpi=150, bbox_inches='tight')

In [ ]:


In [ ]:
gold_ids = get_gold_ids('common_gold_data')

In [ ]:
def plot_gold(i):
    id_ = gold_ids[i]
    p4img = markings.ImageID(id_)
    # create plot window
    fig, ax = plt.subplots(ncols=2, nrows=2)
    axes = ax.flatten()

    # fill images, 0 and 2 get it automatically
    # for i in [1,3]:
    #     p4img.show_subframe(ax=axes[i])

    # remove pixel coord axes
    for ax in axes:
        ax.axis('off')

    # citizen stuff
    p4img.show_subframe(ax=axes[0])
    axes[0].set_title('Planet Four input tile')
    p4img.plot_fans(without_users=markings.GOLD_MEMBERS, ax=axes[1])
    p4img.plot_blotches(without_users=markings.GOLD_MEMBERS, ax=axes[1])
    # plotting.plot_raw_fans(id_, ax=axes[1])
    axes[1].set_title('Citizen Markings')
    plotting.plot_finals(id_, datapath=rm.savefolder, ax=axes[2],
                         wind_pointer=True)
    axes[2].set_title('Catalog clustered data.')

    # gold stuff
    gold_star_plotter(p4img, axes[3], kind='fan')
    gold_star_plotter(p4img, axes[3], kind='blotch')
    axes[3].set_title('Science team markings')
    fig.subplots_adjust(wspace=0.05, hspace=0.15)
    fig.suptitle(id_)
    path = Path("./plots/gold_plots")
    path.mkdir(exist_ok=True)
    fig.savefig(path / f"gold_data{str(i).zfill(2)}.png",
                dpi=150, bbox_inches='tight')

In [ ]:
plot_gold(0)

In [ ]:
for i in range(len(gold_ids)):
    print(i)
    try:
        plot_gold(i)
    except:
        continue
    plt.close('all')

In [ ]:
db = io.DBManager()

In [ ]:
bucket = []
ids = []
for id_ in gold_ids:
    golddata = db.get_image_id_markings(id_).query('user_name in @markings.GOLD_MEMBERS')
    gold_angle_mean = golddata.query('marking=="fan"').angle.mean()
    pm = io.PathManager(id_=id_, datapath=rm.savefolder)
    try:
        catalog_angle_mean = pm.final_fandf.angle.mean()
    except FileNotFoundError:
        continue
    else:
        if len(pm.final_fandf) > 3:
            ids.append(id_)
            bucket.append(gold_angle_mean-catalog_angle_mean)

In [ ]:
bucket = np.array(bucket)
ids = np.array(ids)

In [ ]:
ids[bucket > 25]

In [ ]:
plotting.plot_finals('c0t', datapath=rm.savefolder)

In [ ]:
gold_ids = np.array(gold_ids)

In [ ]:
np.where(gold_ids=='APF0000c0t')

In [ ]:
gold_ids == 'APF00002aj'

In [ ]:
len(bucket)

In [ ]:
bucket=bucket[bucket < 80]

In [ ]:
bucket.max()

In [ ]:
plt.close('all')

In [ ]:
len(gold_ids)

In [ ]:
sns.set_context('paper')

In [ ]:
bucket2 = pd.read_csv("angle_std_bucket.csv", squeeze=True, header=None)

In [ ]:
bins = np.arange(0, 22, 1)

In [ ]:
fig, axes = plt.subplots(constrained_layout=True, ncols=2, figsize=(8, 3))
sns.distplot(bucket[~np.isnan(bucket)], bins=np.arange(-40, 41, 2), kde=False, ax=axes[0])
axes[0].set_title('Histogram of deltas between science team\nand volunteer mean fan directions.')
axes[0].set_xlabel("Delta mean wind direction per Planet Four tile");
axes[0].set_ylabel("Bin Counts");
sns.distplot(bucket2, kde=False, bins=bins, ax=axes[1])
ax=axes[1]
ax.set_title("Histogram of angular STD for merged fan clusters")
ax.set_xlabel("Fan angle standard deviation per cluster [deg]")
ax.set_ylabel("Bin Counts")
for ax in axes:
    ax.grid()
plt.savefig("/Users/klay6683/Documents/latex_docs/p4_paper1/figures/gold_mean_fan_deltas_histo.pdf")

In [ ]:
from scipy.stats import circstd

In [ ]:
circstd?

In [ ]:
from scipy.stats import circstd

In [ ]:

rest


In [ ]:
### 2 windows side by side. before and after clustering
# create plot window
fig, ax = plt.subplots(ncols=2, figsize=(12,5))
fig.tight_layout()
axes = ax.flatten()

# fill images, 0 and 2 get it automatically
for i in [1]:
    p4img.show_subframe(ax=axes[i])

# remove pixel coord axes
for ax in axes:
    ax.axis('off')
    
# citizen stuff
p4img.plot_fans(ax=axes[0])
axes[0].set_title('Citizen Markings')
db = clustering(p4img.get_fans(), axes[1],
                eps=7,
                min_samples=5, fans=True, ls='-')
axes[1].set_title('All citizens clusters (science team dashed lines)')

db = clustering(golddata, axes[1],
                min_samples=2,
                eps=11, fans=True, ls='--')
savefig('MDAP_clustering1.png',dpi=100)

In [ ]:
p4img.plot_fans()
title(p4img.imgid)
plt.axis('off')
savefig('/Users/maye/Dropbox/src/planet4_paper1/images/fan_markings.eps', dpi=150)

In [ ]:
# create plot window
fig, ax = plt.subplots(ncols=1, nrows=2, figsize=(12,10))
axes = ax.flatten()

# fill images, 0 and 2 get it automatically
for i in [0,1]:
    p4img.show_subframe(ax=axes[i])

# citizen stuff
db = clustering(p4img.get_fans(), axes[0],
                eps=7,
                min_samples=5, fans=True)
axes[0].set_title('All citizens clusters (including science team)')

# gold stuff
db = clustering(golddata, axes[1],
                min_samples=1,
                eps=7, fans=True)
axes[1].set_title('Gold data clusters')

In [ ]:
### single window, after clustering
# create plot window
fig, ax = plt.subplots(figsize=(12,9))
axes = ax

# fill images, 0 and 2 get it automatically
for i in [0]:
    p4img.show_subframe(ax=axes)

# citizen stuff
db = clustering(p4img.get_fans(), axes,
                eps=7,
                min_samples=5, fans=True)
# axes.set_title('All citizens clusters (including science team)')
plt.axis('off')
plt.savefig('/Users/maye/Dropbox/src/planet4_paper1/images/fans_clustered.eps',bbox_inches='tight',dpi=150)

In [ ]:
fig, axes = subplots()
# gold stuff
gold_star_plotter(p4img, axes, fans=True, blotches=False)
axes.set_title('Science team markings')

In [ ]:
labels = db.labels_.astype('int')
unique_labels = set(labels)
unique_labels

In [ ]:
label_members = [index[0] for index in argwhere(labels==2)]
label_members

In [ ]:
ellipse_cols

In [ ]:
data = golddata
ellipsedata = data[ellipse_cols].iloc[label_members]
ellipsedata

In [ ]:
fig, ax = plt.subplots()
for i in range(len(ellipsedata)):
    blotch = markings.Blotch(ellipsedata.iloc[i])
    ax.add_artist(blotch)

markings.set_subframe_size(ax)

# meandata = ellipsedata.mean(axis=0)
# meandata